#!/usr/bin/python3

# SPDX-License-Identifier: GPL-2.0+
# Copyright (C) 2018 Oracle.  All rights reserved.
#
# Author: Darrick J. Wong <darrick.wong@oracle.com>

# Run online scrubbers in parallel, but avoid thrashing.

import subprocess
import json
import threading
import time
import sys
import os
import argparse

retcode = 0
terminate = False

def DEVNULL():
	'''Return /dev/null in subprocess writable format.'''
	try:
		from subprocess import DEVNULL
		return DEVNULL
	except ImportError:
		return open(os.devnull, 'wb')

def find_mounts():
	'''Map mountpoints to physical disks.'''
	def find_xfs_mounts(bdev, fs, lastdisk):
		'''Attach lastdisk to each fs found under bdev.'''
		if bdev['fstype'] == 'xfs' and bdev['mountpoint'] is not None:
			mnt = bdev['mountpoint']
			if mnt in fs:
				fs[mnt].add(lastdisk)
			else:
				fs[mnt] = set([lastdisk])
		if 'children' not in bdev:
			return
		for child in bdev['children']:
			find_xfs_mounts(child, fs, lastdisk)

	fs = {}
	cmd=['lsblk', '-o', 'NAME,KNAME,TYPE,FSTYPE,MOUNTPOINT', '-J']
	result = subprocess.Popen(cmd, stdout=subprocess.PIPE)
	result.wait()
	if result.returncode != 0:
		return fs
	sarray = [x.decode(sys.stdout.encoding) for x in result.stdout.readlines()]
	output = ' '.join(sarray)
	bdevdata = json.loads(output)

	# The lsblk output had better be in disks-then-partitions order
	for bdev in bdevdata['blockdevices']:
		lastdisk = bdev['kname']
		find_xfs_mounts(bdev, fs, lastdisk)

	return fs

def kill_systemd(unit, proc):
	'''Kill systemd unit.'''
	proc.terminate()
	cmd=['systemctl', 'stop', unit]
	x = subprocess.Popen(cmd)
	x.wait()

def run_killable(cmd, stdout, killfuncs, kill_fn):
	'''Run a killable program.  Returns program retcode or -1 if we can't start it.'''
	try:
		proc = subprocess.Popen(cmd, stdout = stdout)
		real_kill_fn = lambda: kill_fn(proc)
		killfuncs.add(real_kill_fn)
		proc.wait()
		try:
			killfuncs.remove(real_kill_fn)
		except:
			pass
		return proc.returncode
	except:
		return -1

# systemd doesn't like unit instance names with slashes in them, so it
# replaces them with dashes when it invokes the service.  However, it's not
# smart enough to convert the dashes to something else, so when it unescapes
# the instance name to feed to xfs_scrub, it turns all dashes into slashes.
# "/moo-cow" becomes "-moo-cow" becomes "/moo/cow", which is wrong.  systemd
# actually /can/ escape the dashes correctly if it is told that this is a path
# (and not a unit name), but it didn't do this prior to January 2017, so fix
# this for them.
#
# systemd path escaping also drops the initial slash so we add that back in so
# that log messages from the service units preserve the full path and users can
# look up log messages using full paths.  However, for "/" the escaping rules
# do /not/ drop the initial slash, so we have to special-case that here.
def systemd_escape(path):
	'''Escape a path to avoid mangled systemd mangling.'''

	if path == '/':
		return '-'
	cmd = ['systemd-escape', '--path', path]
	try:
		proc = subprocess.Popen(cmd, stdout = subprocess.PIPE)
		proc.wait()
		for line in proc.stdout:
			return '-' + line.decode(sys.stdout.encoding).strip()
	except:
		return path

def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
	'''Run a scrub process.'''
	global retcode, terminate

	print("Scrubbing %s..." % mnt)
	sys.stdout.flush()

	try:
		if terminate:
			return

		# Try it the systemd way
		cmd=['systemctl', 'start', 'xfs_scrub@%s' % systemd_escape(mnt)]
		ret = run_killable(cmd, DEVNULL(), killfuncs, \
				lambda proc: kill_systemd('xfs_scrub@%s' % mnt, proc))
		if ret == 0 or ret == 1:
			print("Scrubbing %s done, (err=%d)" % (mnt, ret))
			sys.stdout.flush()
			retcode |= ret
			return

		if terminate:
			return

		# Invoke xfs_scrub manually
		cmd=['@sbindir@/xfs_scrub', '@scrub_args@', mnt]
		ret = run_killable(cmd, None, killfuncs, \
				lambda proc: proc.terminate())
		if ret >= 0:
			print("Scrubbing %s done, (err=%d)" % (mnt, ret))
			sys.stdout.flush()
			retcode |= ret
			return

		if terminate:
			return

		print("Unable to start scrub tool.")
		sys.stdout.flush()
	finally:
		running_devs -= mntdevs
		cond.acquire()
		cond.notify()
		cond.release()

def main():
	'''Find mounts, schedule scrub runs.'''
	def thr(mnt, devs):
		a = (mnt, cond, running_devs, devs, killfuncs)
		thr = threading.Thread(target = run_scrub, args = a)
		thr.start()
	global retcode, terminate

	parser = argparse.ArgumentParser( \
			description = "Scrub all mounted XFS filesystems.")
	parser.add_argument("-V", help = "Report version and exit.", \
			action = "store_true")
	args = parser.parse_args()

	if args.V:
		print("xfs_scrub_all version @pkg_version@")
		sys.exit(0)

	fs = find_mounts()

	# Tail the journal if we ourselves aren't a service...
	journalthread = None
	if 'SERVICE_MODE' not in os.environ:
		try:
			cmd=['journalctl', '--no-pager', '-q', '-S', 'now', \
					'-f', '-u', 'xfs_scrub@*', '-o', \
					'cat']
			journalthread = subprocess.Popen(cmd)
		except:
			pass

	# Schedule scrub jobs...
	running_devs = set()
	killfuncs = set()
	cond = threading.Condition()
	while len(fs) > 0:
		if len(running_devs) == 0:
			mnt, devs = fs.popitem()
			running_devs.update(devs)
			thr(mnt, devs)
		poppers = set()
		for mnt in fs:
			devs = fs[mnt]
			can_run = True
			for dev in devs:
				if dev in running_devs:
					can_run = False
					break
			if can_run:
				running_devs.update(devs)
				poppers.add(mnt)
				thr(mnt, devs)
		for p in poppers:
			fs.pop(p)
		cond.acquire()
		try:
			cond.wait()
		except KeyboardInterrupt:
			terminate = True
			print("Terminating...")
			sys.stdout.flush()
			while len(killfuncs) > 0:
				fn = killfuncs.pop()
				fn()
			fs = []
		cond.release()

	if journalthread is not None:
		journalthread.terminate()

	# See the service mode comments in xfs_scrub.c for why we do this.
	if 'SERVICE_MODE' in os.environ:
		time.sleep(2)
		if retcode != 0:
			retcode = 1

	sys.exit(retcode)

if __name__ == '__main__':
	main()
